# -*- coding: utf-8 -*-

"""
DateTime   : 2021/02/16 10:10
Author     : ZhangYafei
Description: 
"""
import re

import requests

headers = {
    "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.146 Safari/537.36'}
response = requests.get('https://book.douban.com/', headers=headers)
content = response.text

print(response)

pattern = re.compile(
    '<li.*?cover.*?href="(.*?)".*?title="(.*?)".*?more-meta.*?author>(.*?)</span>.*?year>(.*?)</span>.*?publisher>(.*?)</span>.*?abstract(.*?)</p>.*?</li>',
    re.S)
results = re.findall(pattern, content)

print(results)


for result in results:
    print(result)
    url, name, author, year, publisher, abstract = result
    name = re.sub('\s', '', name)
    author = re.sub('\s', '', author)
    year = re.sub('\s', '', year)
    publisher = re.sub('\s', '', publisher)
    print(url, name, author, year, publisher, abstract)
